# R3.6
reference:Modelling human blastocysts by reprogramming fibroblasts into iBlastoids
# loading R library
#R3.6
rm(list=ls())
rewrite=FALSE
condaENV <- "/home/chenzh/miniconda3/envs/R3.6"
LBpath <- paste0(condaENV ,"/lib/R/library")
.libPaths(LBpath)
suppressMessages(library(dplyr))
suppressMessages(library(tidyr))
suppressMessages(library(ggplot2))
suppressMessages(library(cowplot))
suppressMessages(library(pheatmap))
suppressMessages(library(Seurat))
# working directory
DIR <- "~/My_project/CheckBlastoids"
setwd(DIR)
knitr::opts_knit$set(root.dir=DIR)
Loading R functions
#source("~/PC/R_code/functions.R")
#source("~/PC/SnkM/SgCell.R")
source("src/local.quick.fun.R")
suppressMessages(library(foreach))
suppressMessages(library(doParallel))
numCores <- 10
registerDoParallel(numCores)
options(digits = 4)
options(future.globals.maxSize= 3001289600)
TD="Nov14_2021"
savefile1=paste0("tmp_data/",TD,"/IBD2.Rdata")
savefile2=paste0("tmp_data/",TD,"/IBD2.counts.meta.Rdata")
if (file.exists(savefile1)) {
load(savefile1,verbose = T)
load(savefile2,verbose=T)
}else{
#' loading raw data
load(paste0("tmp_data/",TD,"/all.counts.meta.Rdata"),verbose=T)
load("tmp_data/gene.meta.Rdata",verbose=T)
temp.mk.gene <- list()
temp.mk.gene$IB_Epi <- c("DPPA5","ALPPL2","POU5F1","MT1G","UTF1")
temp.mk.gene$IB_TE <- c("WFDC2","FABP7","KRT19","EZR","TMEM54")
temp.mk.gene$IB_PE <- c("APOA1","APOA2","CKB","APOC1","LCN15")
temp.mk.gene$IB_IIM1 <- c("VIM","CD70","POU5F1B","TUBB2A","GSN")
temp.mk.gene$IB_IM2 <- c("PITX1","RGS5","CTC-276P9.1","LIX1","HGF")
temp.mk.gene$IB_IM3 <- c("IFIT1","IFIT3","CCL5","IFIT2","OASL")
temp.mk.gene$IB_NR <- c("IFI6","HTRA1","DCN","SOD3","NBL1")
# above marker genes are from
#https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03372-y/MediaObjects/41586_2021_3372_MOESM5_ESM.xlsx
#QC
temp.raw <- meta.all %>% filter(pj=="IBD2")
temp.M <- meta.all %>% filter(pj=="IBD2") %>% filter(nGene >2000 & nGene <5000 & mt.perc < 0.125)
counts.filter <- counts.all[setdiff(rownames(counts.all), mt.gene),temp.M$cell]
IBD2.counts <- counts.filter
IBD2.meta <- temp.M
# expressed genes
temp.sel.expG <- rownames(counts.filter)[rowSums(counts.filter[,c(temp.M$cell)] >=1) >5]
data.temp <- CreateSeuratObject(counts.filter[temp.sel.expG,temp.M$cell], meta.data = (temp.M %>% tibble::column_to_rownames("cell"))) %>% NormalizeData(verbose = FALSE) %>% FindVariableFeatures( selection.method = "vst", nfeatures = 2000, verbose = FALSE) %>% ScaleData(verbose=F,vars.to.regress=c("mt.perc"))%>% RunPCA(verbose=F,npcs=20) %>% RunUMAP(dims=1:20,verbose=F,min.dist=0.4) %>% FindNeighbors( dims = 1:20,verbose = FALSE) %>% FindClusters(resolution = 0.2,verbose = FALSE) #"mt.perc",
data.mod <- data.temp %>% RenameIdents(c('0'='IB_IM1','1'='IB_PE','2'='IB_TE','3'='IB_Epi','4'='IB_IM2','5'='IB_PE','6'='IB_IM3','7'='IB_NR'))
#assign the top right corner cells to uncertain cells
uc.cell <- colnames(data.mod)[Idents(data.mod)=="IB_IM3" & data.mod@reductions$umap@cell.embeddings[,1] > 5]
data.mod <- SetIdent(data.mod,cells=uc.cell,value="IB_uc")
table(data.mod %>% Idents())
save(IBD2.counts,IBD2.meta,file=savefile2)
save(data.mod,temp.sel.expG,temp.M,temp.raw,temp.mk.gene,uc.cell,file=savefile1)
}
## Loading objects:
## data.mod
## temp.sel.expG
## temp.M
## temp.raw
## temp.mk.gene
## uc.cell
## Loading objects:
## IBD2.counts
## IBD2.meta
#DR.TPM.filter <- apply(counts.filter,2,function(x){x/sum(x)*1000000})
#DimPlot(data.mod,cells.highlight=uc.cell)
repeat paper’s figure
print("https://www.nature.com/articles/s41586-021-03372-y.pdf fig2C")
## [1] "https://www.nature.com/articles/s41586-021-03372-y.pdf fig2C"
print("marker genes are extrated from https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03372-y/MediaObjects/41586_2021_3372_MOESM5_ESM.xlsx")
## [1] "marker genes are extrated from https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03372-y/MediaObjects/41586_2021_3372_MOESM5_ESM.xlsx"
my new annotation
temp.plot2 <- list()
temp.plot2$id <- DimPlot(data.mod,label=T) +NoAxes()+NoLegend()
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
for (g in names(temp.mk.gene)) {
temp.plot2[[g]] <- FeaturePlot(data.mod,temp.mk.gene[[g]][1])+NoAxes()+NoLegend()+ggtitle(paste(g,temp.mk.gene[[g]][1],sep="\n"))+FunTitle()
}
cowplot::plot_grid(plotlist=temp.plot2)
#VlnPlot(data.mod,temp.mk.gene)
print(
temp.raw %>% ggplot +geom_point(mapping=aes(x=mt.perc,y=nGene,col=EML))+geom_vline(xintercept = 0.125,linetype = "dashed" )+geom_hline(yintercept = 2000,linetype = "dashed")+geom_hline(yintercept = 5000,linetype = "dashed")+theme_classic()+ggtitle("QC for 10X")+FunTitle()
)
#temp.M <- meta.all %>% filter(pj=="JPF2019" & EML=="Pos48ELS") %>% filter(nGene >=3200 & nGene <= 6200 & mt.perc < 0.06) %>% bind_rows(meta.all %>% filter(pj=="JPF2019" & EML=="H9AMN") %>% filter(nGene >=3600 & nGene <= 6400 & mt.perc < 0.06))
saving the new meta data
IBD2.meta <- IBD2.meta %>% filter(cell %in% colnames(data.mod))
IBD2.meta$EML <- Idents(data.mod)[IBD2.meta$cell] %>% as.vector()
downsample dataset
temp.out <-split(IBD2.meta,IBD2.meta$EML)
meta.IBD2.further.ds <- temp.out[c("IB_Epi","IB_PE","IB_TE")] %>% lapply(function(x){return(FunMaSF(x,150))}) %>% do.call("bind_rows",.) %>% bind_rows( temp.out[c("IB_IM1","IB_IM2","IB_IM3")] %>% lapply(function(x){return(FunMaSF(x,100))}) %>% do.call("bind_rows",.))
save object
if (!file.exists(paste0("tmp_data/",TD,"/meta.IBD2.further.rds")) | rewrite) {
print("save output")
saveRDS(IBD2.meta,paste0("tmp_data/",TD,"/meta.IBD2.further.rds"))
saveRDS(meta.IBD2.further.ds ,file=paste0("tmp_data/",TD,"/meta.IBD2.further.ds.rds"))
}
temp.plot <- list()
for (g in c("TRIML2","ISL1","STOM","TMEM54","CTSV","GABRP","MSX2","EPAS1","ANXA3","DLX5","HEY1","KRT7","SERPINB9","ATP2B1","TFAP2A" )) {
temp.plot[[g]] <- FeaturePlot(data.mod,g)+NoAxes()+NoLegend()+ggtitle(paste(g))+FunTitle()
}
cowplot::plot_grid(plotlist=temp.plot[1:15])
temp.plot <- list()
for (g in c("GATA3","GATA2","CDX2","KRT18","TEAD3","CGA","CCR7","PDF","GCM1" )) {
temp.plot[[g]] <- FeaturePlot(data.mod,g)+NoAxes()+NoLegend()+ggtitle(paste(g))+FunTitle()
}
cowplot::plot_grid(plotlist=temp.plot)
temp.plot <- list()
for (g in c("CCR7","CGA","CSF3R","CYP19A1","DLX5","GCM1","GREM2","KRT23","MLLT11","MRGPRX1","MUC15","MYO10","NRP1","OVOL1","PGF","PTPRE","S1PR2","SDC1","SERPINB9","SLCO4A1")) {
if (g %in% rownames(data.mod@assays$RNA@counts)) {
temp.plot[[g]] <- FeaturePlot(data.mod,g)+NoAxes()+NoLegend()+ggtitle(paste(g))+FunTitle()
}else{
temp.plot[[g]] <-ggplot() + annotate("text", x = 4, y = 25, size=2, label = c("not expressed(<6)")) + theme_void()+ggtitle(paste(g))+FunTitle()
}
}
cowplot::plot_grid(plotlist=temp.plot)